Vehicle detection and tracking

This juypter notebook contains a pipeline to detect vehicles and tracks them in the image frame.

Init

In [165]:
import numpy as np
import time
import os
import copy
import glob
import random
from collections import deque
import pickle
import cv2
import matplotlib.pyplot as plt
#%matplotlib nbagg
%matplotlib inline

import matplotlib.image as mpimg
from mpl_toolkits.mplot3d import Axes3D
from scipy.ndimage.measurements import label
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.utils import shuffle
from sklearn.svm import LinearSVC, SVC
from sklearn import linear_model
from skimage.feature import hog
from scipy.ndimage.measurements import label

from IPython.core.display import display, HTML
display(HTML("""<style>
    div#notebook-container    { width: 70%; }
    div#menubar-container     { width: 70%; }
    div#maintoolbar-container { width: 70%; }
</style>"""))

# pretty formatting of np float arrays
float_formatter = lambda x: "%10.5f" % x
np.set_printoptions(formatter={'float_kind' : float_formatter})



#############################################################################
# Constants and variables
#############################################################################

# Cache for training and test features and classifier
path_cache_savefile = "./data_cache.p"

# Small dataset for testing
path_notcar_small = "../non-vehicles_smallset/"
path_car_small    = "../vehicles_smallset/"

# Big dataset for actual usage
path_notcar_big  = "../non-vehicles/"
path_car_big     = "../vehicles/"

path_test_imgs   = "test_images/"
path_output_imgs = "output_images/" 

Helper functions

Most of those functions are adapted from lesson or re-used from previous projects.

In [166]:
# Draw boxes in an image
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=5):
    # Make a copy of the image
    imcopy = np.copy(img)
    # Iterate through the bounding boxes
    for bbox in bboxes:
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, tuple(bbox[0]), tuple(bbox[1]), color, thick)

    return imcopy


def convert_color(img, colorspace="YCrCb"):
    if colorspace == "HSV":
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    elif colorspace == "LUV":
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    elif colorspace == "HLS":
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    elif colorspace == "YUV":
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    elif colorspace == "YCrCb":
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    else:
        print("Colorspace {} is not supported!".format(colorspace))
    return feature_image


# Define a function to compute binned color features  
def bin_spatial(img, size=(32, 32)):
    if len(img.shape) < 3:
        return cv2.resize(img, size).ravel()
    else:
        ch1, ch2, ch3 = [cv2.resize(img[:,:,i], size).ravel() for i in range(img.shape[2])]
        return np.hstack((ch1, ch2, ch3))


# Define a function to compute color histogram features  
def color_hist(img, nbins=32): #, bins_range=(0, 256)):
    # Compute the histogram of the color channels separately
    hist1, hist2, hist3 = [np.histogram(img[:,:,i], bins=nbins) for i in range(3)]
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((hist1[0], hist2[0], hist3[0]))

    return hist_features


# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    # Use skimage.hog() to get both features and a visualization
    if vis:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell), 
                          cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False,
                          visualise=vis, feature_vector=feature_vec, block_norm="L1")

        return features, hog_image
        
    else:
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell), 
              cells_per_block=(cell_per_block, cell_per_block), visualise=vis, transform_sqrt=False,
              feature_vector=feature_vec, block_norm="L1")

        return features



    
# Define a function to extract features from a single image window
def single_img_features(img, color_space="RGB", spatial_size=(32, 32), hist_bins=32, orient=9, pix_per_cell=8,
                        cell_per_block=2, hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):    
    
    # Apply color conversion if other than 'RGB'
    if color_space != "RGB":
        feature_image = convert_color(img, color_space)
    else:
        feature_image = np.copy(img)      

    #feature_image = equalize_histogram(feature_image)
    
    # from now on we want work with pixel values ranging from 0 to 1
    feature_image = feature_image.astype(np.float32) / 255.0

    # Compute spatial features if flag is set
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size).reshape(-1)

    # Compute histogram features if flag is set
    if hist_feat == True:
        hist_features = color_hist(feature_image, nbins=hist_bins).reshape(-1)

    # Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == -1:
            h1, h2, h3 = [ get_hog_features(feature_image[:,:,ch], orient, pix_per_cell, cell_per_block, vis=False, feature_vec=False)
                                for ch in range(3) ]
            hog_features = np.hstack((h1, h2, h3)).reshape(-1)
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, pix_per_cell,
                                            cell_per_block, vis=False, feature_vec=True)

    return np.hstack((spatial_features, hist_features, hog_features))


# Extract features across a list of images 
def extract_features(imgs, color_space="RGB", spatial_size=(32, 32), hist_bins=32, orient=9, pix_per_cell=8,
                     cell_per_block=2, hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):
    features = []
    for file in imgs:
        image = mpimg.imread(file)
        if file.endswith(".png"):
            image = image * 255.0
            image = image.astype(np.uint8)
            
        feature = single_img_features(image, color_space=color_space, spatial_size=spatial_size, hist_bins=hist_bins, orient=orient,
                                        pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel,
                                        spatial_feat=spatial_feat, hist_feat=hist_feat, hog_feat=hog_feat)
        features.append(feature)

    return features


# Define a function you will pass an image and the list of windows to be searched (output of slide_windows())
def search_windows(img, windows, clf, scaler, color_space='RGB', spatial_size=(32, 32), hist_bins=32, 
                    hist_range=(0, 256), orient=9, pix_per_cell=8, cell_per_block=2, 
                    hog_channel=0, spatial_feat=True, hist_feat=True, hog_feat=True):
    on_windows = []
    for window in windows:
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        features = single_img_features(test_img, color_space=color_space,spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, 
                            hog_channel=hog_channel, spatial_feat=spatial_feat, hist_feat=hist_feat, hog_feat=hog_feat)
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        prediction = clf.predict(test_features)
        # If positive (prediction == 1) then save the window
        if prediction == 1:
            on_windows.append(window)

    return on_windows


# Define a single function that can extract features using hog sub-sampling and make predictions
def find_cars(img, colorspace, yrange, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block,
              spatial_size, hist_bins, spatial_feat=True, hist_feat=True, cells_per_step=2):
    
    # cells_per_step = 4  # Instead of overlap, define how many cells to step
    image_to_search = img[yrange[0]:yrange[1],:,:]
    
    if colorspace != "RGB":
        ctrans_tosearch = convert_color(image_to_search, colorspace)
    else:
        ctrans_tosearch = np.copy(image_to_search)

    #ctrans_tosearch = equalize_histogram(ctrans_tosearch)
    ctrans_tosearch = ctrans_tosearch.astype(np.float32) / 255
            
    if scale != 1.0:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
        
    ch1, ch2, ch3 = [ ctrans_tosearch[:,:,i] for i in range(3) ]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1 
    nfeat_per_block = orient*cell_per_block**2
    
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step
    
    # Compute individual channel HOG features for the entire image
    hog1, hog2, hog3 = [ get_hog_features(ch, orient, pix_per_cell, cell_per_block, feature_vec=False) for ch in [ch1, ch2, ch3] ]
    
    all_bboxes = []
    bboxes = []
    test_features = []
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this window
            hw1, hw2, hw3 = [ hog[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() for hog in [hog1, hog2, hog3] ]
            hog_features = np.hstack((hw1, hw2, hw3))
            xleft = xpos*pix_per_cell
            ytop  = ypos*pix_per_cell
            
            # Extract the image patch
            subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (window,window))
          
            # Get color features
            spatial_features = bin_spatial(subimg, size=spatial_size)
            hist_features    = color_hist(subimg, nbins=hist_bins)

            # Scale features and make a prediction
            feat = X_scaler.transform(np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1))
            test_features.append(feat)
            
            xbox_left = np.int(xleft*scale)
            ytop_draw = np.int(ytop*scale)
            win_draw  = np.int(window*scale)
            all_bboxes.append(((xbox_left, ytop_draw+yrange[0]), (xbox_left+win_draw,ytop_draw+win_draw+yrange[0])))

    # get predictions for all patches
    all_bboxes = np.array(all_bboxes)
    test_prediction = svc.predict(np.concatenate(test_features)).astype(np.uint8)
    bboxes = all_bboxes[test_prediction == 1]

    return bboxes, all_bboxes


def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap


def draw_labeled_bboxes(img, labels, color=(0,0,255), thick=5):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        img = draw_boxes(img, [bbox], color=color, thick=thick)
        
    return img


def heatmap_filter(image, bboxes, thres=1.0, heat_clip=(0, 255), color=(0,0,255), thick=5):
    heat = np.zeros_like(image[:,:,0]).astype(np.float)
    # Add heat to each box in box list
    heat = add_heat(heat, bboxes)
    # Apply threshold to help remove false positives, zero out pixels below the threshold
    heat[heat <= thres] = 0
    # Visualize the heatmap when displaying    
    heatmap = np.clip(heat, *heat_clip)
    # Find final boxes from heatmap using label function
    labels = label(heatmap)
    draw_img = draw_labeled_bboxes(np.copy(image), labels, color, thick=thick)    
    return draw_img, heatmap


# Define a function to return some characteristics of the dataset 
def data_look(car_list, notcar_list):
    data_dict = {}
    # Define a key in data_dict "n_cars" and store the number of car images
    data_dict["n_cars"] = len(car_list)
    # Define a key "n_notcars" and store the number of notcar images
    data_dict["n_notcars"] = len(notcar_list)
    # Read in a test image, either car or notcar
    # Define a key "image_shape" and store the test image shape 3-tuple
    img = mpimg.imread(car_list[0])
    data_dict["image_shape"] = np.array(img).shape
    # Define a key "data_type" and store the data type of the test image.
    data_dict["data_type"] = np.array(img).dtype
    # Return data_dict
    return data_dict


def equalize_histogram(img, clipLimit=2.0, tileGridSize=(8,8)):
    # use histogram equalization to make sure image has a good contrast
    # for edge detection and thresholding
    clahe = cv2.createCLAHE(clipLimit=clipLimit, tileGridSize=tileGridSize)
    tmp = np.copy(img)
    if len(img.shape) == 3:
        for i in range(img.shape[2]):
            tmp[:,:,i] = clahe.apply(img[:,:,i]) 
    else:
        tmp = clahe.apply(img)
    return tmp

Dataset exploration

The dataset which came with this project is tiny excerpt from the GTI, KITTI and Udacity datasets. It contains around 8.8k car and 8.9k non-cars pictures which are 64x64 pixels in size.

In [167]:
dataset="big" # "small" or "big"

if dataset == "small":
    images_cars    = glob.glob(path_car_small + "**/*.jpeg", recursive=True)
    images_notcars = glob.glob(path_notcar_small + "**/*.jpeg", recursive=True)
else:
    images_cars = glob.glob(path_car_big + "**/*.png", recursive=True)
    images_notcars = glob.glob(path_notcar_big + "**/*.png", recursive=True)

fig, axs = plt.subplots(4,8, figsize=(10, 6))
fig.subplots_adjust(hspace = 0.2, wspace = 0.05)
axs = axs.ravel()

for i in range(16):
    img = mpimg.imread(images_cars[np.random.randint(0, len(images_cars))])
    axs[i].axis("off")
    axs[i].set_title("car", fontsize=10)
    axs[i].imshow(img)
for i in range(16,32):
    img = mpimg.imread(images_notcars[np.random.randint(0, len(images_notcars))])
    axs[i].axis("off")
    axs[i].set_title("noncar", fontsize=10)
    axs[i].imshow(img)

Visualization of spatial features

In [168]:
# Read in the image
image_names = glob.glob(path_test_imgs+"*.jpg")
fig, axs = plt.subplots(2,2, figsize=(14, 10))
bboxes = [ ((400, 400), (450, 450)), ((810, 410), (860, 460)) ]

for i, (bbox, image_name) in enumerate(zip(bboxes, image_names[0:2])):
    image = mpimg.imread(image_name)
    image_box = draw_boxes(image, [bbox], color=(0, 0, 255), thick=6)
    axs[0][i].imshow(image_box)
    axs[0][i].axis("off")
    axs[0][i].set_title(image_name)
    image_small = image[bbox[0][1]:bbox[1][1], bbox[0][0]:bbox[1][0], :]
    image_small = cv2.resize(image_small, (16,16))
    axs[1][i].imshow(image_small)
    axs[1][i].set_title("Binned 16x16 pixel spatial feature")
plt.show()

Visualization of histogram features

In [169]:
# Read in the image
image_names = glob.glob(path_test_imgs+"*.jpg")

image = mpimg.imread(image_names[0])

# Take histograms in R, G, and B
rhist, ghist, bhist = [np.histogram(image[:,:,i], bins=32, range=(0, 256)) for i in range(3)]

# Generating bin centers
bin_edges = rhist[1]
bin_centers = (bin_edges[1:] + bin_edges[0:len(bin_edges)-1]) / 2

fig = plt.figure(figsize=(10,5))
plt.imshow(image)
plt.axis("off")

# Plot a figure with all three bar charts
fig = plt.figure(figsize=(16,3))
plt.subplot(131)
plt.bar(bin_centers, rhist[0])
plt.xlim(0, 256)
plt.title('R Histogram')
plt.subplot(132)
plt.bar(bin_centers, ghist[0])
plt.xlim(0, 256)
plt.title('G Histogram')
plt.subplot(133)
plt.bar(bin_centers, bhist[0])
plt.xlim(0, 256)
plt.title('B Histogram')
plt.show()

Visualizing HOG features on images

In [170]:
fig, ax = plt.subplots(2,4, figsize=(10, 6))
fig.subplots_adjust(hspace = 0.1, wspace = 0.05)

for i in range(2):
    car_ind    = np.random.randint(0, len(images_cars))
    notcar_ind = np.random.randint(0, len(images_notcars))

    # Read in random car/not-car images
    print(images_cars[car_ind])
    print(images_notcars[notcar_ind])
    car_image = mpimg.imread(images_cars[car_ind])
    if images_cars[car_ind].endswith(".png"):
        car_image = car_image * 255.0
        car_image = car_image.astype(np.uint8)
    notcar_image = mpimg.imread(images_notcars[notcar_ind])
    if images_notcars[notcar_ind].endswith(".png"):
        notcar_image = notcar_image * 255.0
        notcar_image = notcar_image.astype(np.uint8)
    #car_image = equalize_histogram(car_image)
    car_gray = cv2.cvtColor(car_image, cv2.COLOR_RGB2GRAY)
    notcar_gray = cv2.cvtColor(notcar_image, cv2.COLOR_RGB2GRAY)
    
    # Call function with vis=True to see an image output
    _, hog_carimage = get_hog_features(car_gray, orient=12, pix_per_cell=8, cell_per_block=8, vis=True, feature_vec=True)
    # Call function with vis=True to see an image output
    _, hog_notcarimage = get_hog_features(notcar_gray, orient=12, pix_per_cell=8, cell_per_block=8, vis=True, feature_vec=True)

    ax[i][0].imshow(car_image)
    ax[i][0].set_title("Car Image", fontsize=10)
    ax[i][0].axis("off")
    ax[i][1].imshow(hog_carimage, cmap="gray")
    ax[i][1].set_title("Car HOG features", fontsize=10)
    ax[i][1].axis("off")
    ax[i][2].imshow(notcar_image)
    ax[i][2].set_title("Non-Car Image", fontsize=10)
    ax[i][2].axis("off")
    ax[i][3].imshow(hog_notcarimage, cmap="gray")
    ax[i][3].set_title("Non-Car HOG features", fontsize=10)
    ax[i][3].axis("off")
../vehicles/KITTI_extracted/3041.png
../non-vehicles/Extras/extra3141.png
../vehicles/KITTI_extracted/1221.png
../non-vehicles/GTI/image1134.png

Train SVM classifier on dataset

Dataset selection, exploration and feature extraction

In [171]:
dataset="big" # "small" or "big"

if dataset == "small":
    images_cars    = glob.glob(path_car_small + "**/*.jpeg", recursive=True)
    images_notcars = glob.glob(path_notcar_small + "**/*.jpeg", recursive=True)
else:
    images_cars = glob.glob(path_car_big + "**/*.png", recursive=True)
    images_notcars = glob.glob(path_notcar_big + "**/*.png", recursive=True)


# if we already saved all features
if os.path.isfile(path_cache_savefile):
    # Load the training and test features:
    with open(path_cache_savefile, mode="rb") as f:
        data = pickle.load(f)

    car_features     = data["cars_features"]
    notcar_features  = data["notcars_features"]
    colorspace       = data["colorspace"]
    orient           = data["orient"]
    pix_per_cell     = data["pix_per_cell"]
    cell_per_block   = data["cell_per_block"]
    hog_channel      = data["hog_channel"]
    spatial_size     = data["spatial_size"]
    hist_bins        = data["hist_bins"]
    spatial_feat     = data["spatial_feat"]
    hist_feat        = data["hist_feat"]
    hog_feat         = data["hog_feat"]
    print("Loaded parameters!")
else:
    
    # Reduce the sample size
    sample_size = 5000
    cars = images_cars #[0:sample_size]
    notcars = images_notcars #[0:sample_size]

    data_info = data_look(images_cars, images_notcars)
    print("Dataset contains ", data_info["n_cars"], " cars and", data_info["n_notcars"], " non-cars")
    print("of size: ", data_info["image_shape"], " and data type:", data_info["data_type"])

    ### Tweak these parameters and see how the results change.
    colorspace     = "YCrCb" # possible values: RGB, HSV, LUV, HLS, YUV, YCrCb
    orient         = 9  # HOG orientations
    pix_per_cell   = 8 # HOG pixels per cell
    cell_per_block = 2 # HOG cells per block
    hog_channel    = -1 # Can be 0, 1, 2, or -1 for all channels
    spatial_size   = (32, 32) # Spatial binning dimensions
    hist_bins      = 32    # Number of histogram bins
    spatial_feat   = True # Spatial features on or off
    hist_feat      = True # Histogram features on or off
    hog_feat       = True # HOG features on or off

    t=time.time()
    car_features    = extract_features(cars, color_space=colorspace, spatial_size=spatial_size, hist_bins=hist_bins, 
                                       orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    notcar_features = extract_features(notcars, color_space=colorspace, spatial_size=spatial_size, hist_bins=hist_bins,
                                       orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    t2 = time.time()
    print("It took {:0.2f} seconds to extract features...".format(t2-t))
    
    # Save the training and test features:
    data = {
        "cars_features"    : car_features,
        "notcars_features" : notcar_features,
        "colorspace"       : colorspace,
        "orient"           : orient,
        "pix_per_cell"     : pix_per_cell,
        "cell_per_block"   : cell_per_block,
        "hog_channel"      : hog_channel,
        "spatial_size"     : spatial_size,
        "hist_bins"        : hist_bins,
        "spatial_feat"     : spatial_feat,
        "hist_feat"        : hist_feat,
        "hog_feat"         : hog_feat
    }
    with open(path_cache_savefile, mode="wb") as f:
        pickle.dump(data, f)

# Create an array stack of feature vectors
X = np.vstack((car_features, notcar_features)).astype(np.float64)
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
scaled_X, y = shuffle(scaled_X, y, random_state=rand_state)
rand_state = np.random.randint(0, 100)
scaled_X, y = shuffle(scaled_X, y, random_state=rand_state)
rand_state = np.random.randint(0, 100)
scaled_X, y = shuffle(scaled_X, y, random_state=rand_state)
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=rand_state)
print("Colorspace: ", colorspace, "spatial_size: ", spatial_size , "hist_bins: ", hist_bins)
print('Using:', orient, 'orientations', pix_per_cell, 'pixels per cell and', cell_per_block, 'cells per block')
print('Feature vector length:', len(X_train[0]))
Loaded parameters!
Colorspace:  YCrCb spatial_size:  (32, 32) hist_bins:  32
Using: 9 orientations 8 pixels per cell and 2 cells per block
Feature vector length: 8460

Training classifier and testing accuracy on test set

In [172]:
if os.path.isfile(path_cache_savefile):
    # Load the training and test features:
    with open(path_cache_savefile, mode="rb") as f:
        data = pickle.load(f)

    if "svc" in data:
        svc = data["svc"]
        print("Loaded linear SVC classifier:")
    else:
        svc = None

if not svc:
    print("Training linear SVC classifier from scratch")
    # Use a linear SVC 
    svc = LinearSVC(C=1.0)
    # Check the training time for the SVC
    t=time.time()
    svc.fit(X_train, y_train)
    t2 = time.time()
    print("It took {:0.2f} seconds to train SVC...".format(t2-t))
    
    data["svc"] = svc
    with open(path_cache_savefile, mode="wb") as f:
        pickle.dump(data, f)

print(svc)
Loaded linear SVC classifier:
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)
In [ ]:
"""
parameters = {'class_weight':(None, 'balanced'), 'C':[0.1, 1.0, 10, 100]}
svc = LinearSVC()
clf = GridSearchCV(svc, parameters, n_jobs=3)
clf.fit(X_train, y_train)
sorted(clf.cv_results_.keys())
"""
In [ ]:
"""
print(clf.cv_results_["split1_test_score"])
"""

Brief test with deep learning classifier from keras

In [ ]:
from keras.applications import InceptionV3
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, AveragePooling2D, Convolution2D, Dense, Dropout, Flatten, Input, MaxPooling2D, ZeroPadding2D
from keras.models import Model, Sequential

batch_size = 256
nb_epoch   = 10

# TODO: maybe InceptionV3 is a little bit heavy, I should try a smaller network...
"""
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
#datagen.fit(X_train_upscaled)

# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# add a global spatial average pooling layer
x = base_model.output
x = Dropout(0.5)(x)
predictions = Dense(2, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train_upscaled, y_train, batch_size=batch_size),
                    steps_per_epoch=X_train.shape[0] // batch_size,
                    validation_data=(X_test_upscaled, y_test),
                    epochs=nb_epoch, verbose=1)
"""
In [ ]:
"""
clf = linear_model.SGDClassifier(max_iter=600, n_jobs=-1, loss="hinge")
t=time.time()
clf.fit(X_train, y_train)
t2 = time.time()
print("It took {:0.2f} seconds to train SGDClassifier...".format(t2-t))
"""
In [ ]:
"""
from sklearn.ensemble import BaggingClassifier
from sklearn.multiclass import OneVsRestClassifier
n_estimators = 5
clf2 = OneVsRestClassifier(BaggingClassifier(LinearSVC(C=1.0, class_weight="balanced"), n_jobs=-1,
                                             max_samples=1.0 / n_estimators, n_estimators=n_estimators))
t=time.time()
clf2.fit(X_train, y_train)
t2 = time.time()
print("It took {:0.2f} seconds to train BaggingClassifier...".format(t2-t))
"""
In [173]:
# Check the score of the SVC
score = svc.score(X_test, y_test)
print("Test Accuracy of SVC = {:0.3f}".format(score))

# Check the score of the BaggingClassifier
#score = clf2.score(X_test, y_test)
#print("Test Accuracy of clf2 = {:0.3f}".format(score))

# Check the score of the SGDClassifier
#score = clf.score(X_test, y_test)
#print("Test Accuracy of SGDClassifier = {:0.3f}".format(score))
Test Accuracy of SVC = 0.946
In [174]:
y_score = svc.predict(X_test)
average_precision    = average_precision_score(y_test, y_score)
precision, recall, _ = precision_recall_curve(y_test, y_score)

plt.step(recall, precision, color='b', alpha=0.2, where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title("2-class Precision-Recall curve: AP={0:0.2f}".format(average_precision))
plt.show()

Testing on example images by drawing bounding boxes around vehicles

In [176]:
# Min and max in y to search in slide_window()
y_start_stop = [[400, 580], [400, 650], [400, 720]]
scales       =  [1.2      , 1.6       , 2.0      ]
colors       =  [(255,0,0), (0,255,0) , (0,0,255)]
cells_per_steps = [3 , 3 , 4]

image_names = glob.glob(path_test_imgs+"*.jpg")

image = mpimg.imread(image_names[0])
if image_name.endswith(".png"):
    image = image * 255.0
    image = image.astype(np.uint8)

out_img_all = np.copy(image)
for i, (cells_per_step, color, scale, y_range) in enumerate(zip(cells_per_steps, colors, scales, y_start_stop)):
    bboxes, all_bboxes = find_cars(out_img_all, colorspace, y_range, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block,
                                   spatial_size, hist_bins, spatial_feat=True, hist_feat=True, cells_per_step=cells_per_step)
    out_img_all = draw_boxes(bboxes=all_bboxes, img=out_img_all, color=color, thick=3)    

fig = plt.figure(figsize=(12,10))
plt.title("All windows")
plt.imshow(out_img_all)
plt.show()    
In [177]:
for image_name in image_names:
    image = mpimg.imread(image_name)
    if image_name.endswith(".png"):
        image = image * 255.0
        image = image.astype(np.uint8)

    out_img = np.copy(image)
    out_img_all = np.copy(image)
    bboxes_list = []
    time1 = time.time()
    for i, (cells_per_step, color, scale, y_range) in enumerate(zip(cells_per_steps, colors, scales, y_start_stop)):
        bboxes, all_bboxes = find_cars(out_img, colorspace, y_range, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block,
                                       spatial_size, hist_bins, spatial_feat=True, hist_feat=True, cells_per_step=cells_per_step)
        print("scale %.1f: %d boxes total (%d detections)" % (scale, len(all_bboxes), len(bboxes)))
        bboxes_list.extend(bboxes)
        out_img = draw_boxes(bboxes=bboxes, img=out_img, color=color, thick=2)    
        out_img_all = draw_boxes(bboxes=all_bboxes, img=out_img_all, color=color, thick=3)    

    out_img, _ = heatmap_filter(out_img, bboxes_list, thres=3.5, heat_clip=(0, 255), color=(238,130,238), thick=8)
    time2 = time.time()
    print("It took {:.2f}s to process picture".format(time2-time1))
    fig, axs = plt.subplots(1, 2, figsize=(18,14))
    fig.subplots_adjust(hspace = .004, wspace=.008)
    axs[0].set_title("Found windows")
    axs[0].imshow(out_img)
    axs[0].axis("off")
    axs[1].set_title("All windows")
    axs[1].imshow(out_img_all)
    axs[1].axis("off")
    plt.show()
scale 1.2: 123 boxes total (1 detections)
scale 1.6: 90 boxes total (0 detections)
scale 2.0: 54 boxes total (0 detections)
It took 0.64s to process picture
scale 1.2: 123 boxes total (22 detections)
scale 1.6: 90 boxes total (8 detections)
scale 2.0: 54 boxes total (3 detections)
It took 0.68s to process picture
scale 1.2: 123 boxes total (22 detections)
scale 1.6: 90 boxes total (17 detections)
scale 2.0: 54 boxes total (6 detections)
It took 0.66s to process picture
scale 1.2: 123 boxes total (2 detections)
scale 1.6: 90 boxes total (1 detections)
scale 2.0: 54 boxes total (1 detections)
It took 0.64s to process picture
scale 1.2: 123 boxes total (18 detections)
scale 1.6: 90 boxes total (8 detections)
scale 2.0: 54 boxes total (8 detections)
It took 0.64s to process picture
scale 1.2: 123 boxes total (11 detections)
scale 1.6: 90 boxes total (5 detections)
scale 2.0: 54 boxes total (4 detections)
It took 0.67s to process picture

Showing heat map

In [178]:
for image_name in image_names:
    image = mpimg.imread(image_name)
    if image_name.endswith(".png"):
        image = image * 255.0
        image = image.astype(np.uint8)

    out_img = np.copy(image)
    out_img_all = np.copy(image)
    bboxes_list = []
    time1 = time.time()
    for i, (cells_per_step, color, scale, y_range) in enumerate(zip(cells_per_steps, colors, scales, y_start_stop)):
        bboxes, all_bboxes = find_cars(out_img, colorspace, y_range, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block,
                                       spatial_size, hist_bins, spatial_feat=True, hist_feat=True, cells_per_step=cells_per_step)
        print("scale %.1f: %d boxes total (%d detections)" % (scale, len(all_bboxes), len(bboxes)))
        bboxes_list.extend(bboxes)
        out_img = draw_boxes(bboxes=bboxes, img=out_img, color=color, thick=2)    
        out_img_all = draw_boxes(bboxes=all_bboxes, img=out_img_all, color=color, thick=3)    

    out_img, heatmap = heatmap_filter(out_img, bboxes_list, thres=3.5, heat_clip=(0, 255), color=(238,130,238), thick=8)
    time2 = time.time()
    print("It took {:.2f}s to process picture".format(time2-time1))
    fig, axs = plt.subplots(1, 2, figsize=(18,14))
    fig.subplots_adjust(hspace = .004, wspace=.008)
    axs[0].set_title("Found windows")
    axs[0].imshow(out_img)
    axs[0].axis("off")
    axs[1].set_title("Heat map")
    axs[1].imshow(heatmap, cmap='hot')
    axs[1].axis("off")
    plt.show()
scale 1.2: 123 boxes total (1 detections)
scale 1.6: 90 boxes total (0 detections)
scale 2.0: 54 boxes total (0 detections)
It took 0.64s to process picture
scale 1.2: 123 boxes total (22 detections)
scale 1.6: 90 boxes total (8 detections)
scale 2.0: 54 boxes total (3 detections)
It took 0.65s to process picture
scale 1.2: 123 boxes total (22 detections)
scale 1.6: 90 boxes total (17 detections)
scale 2.0: 54 boxes total (6 detections)
It took 0.67s to process picture
scale 1.2: 123 boxes total (2 detections)
scale 1.6: 90 boxes total (1 detections)
scale 2.0: 54 boxes total (1 detections)
It took 0.63s to process picture
scale 1.2: 123 boxes total (18 detections)
scale 1.6: 90 boxes total (8 detections)
scale 2.0: 54 boxes total (8 detections)
It took 0.65s to process picture
scale 1.2: 123 boxes total (11 detections)
scale 1.6: 90 boxes total (5 detections)
scale 2.0: 54 boxes total (4 detections)
It took 0.65s to process picture

Running vehicle detection on videos

In [ ]:
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from moviepy.editor import *

# save detected windows across image frames in a queue which resembles a ringbuffer 
nr_frames = 20
boxes_queue = deque(maxlen=len(scales)*nr_frames)

video_output_folder = "videos_output/"
videos = [
   "project_video.mp4",
   #"test_video.mp4"
]


def process_image(image):
    xsize, ysize = img_size = (image.shape[1], image.shape[0])
    out_img = np.copy(image)
    bboxes_list = []

    for i, (cells_per_step, color, scale, y_range) in enumerate(zip(cells_per_steps, colors, scales, y_start_stop)):
        bboxes, all_bboxes = find_cars(out_img, colorspace, y_range, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block,
                                       spatial_size, hist_bins, spatial_feat=True, hist_feat=True, cells_per_step=cells_per_step)
        boxes_queue.extend(bboxes)

    out_img, heatmap = heatmap_filter(out_img, boxes_queue, thres=nr_frames/2, heat_clip=(0, 255), color=(255,0,0))
    return out_img


for video in videos:
    if not os.path.exists(video_output_folder):
        os.makedirs(video_output_folder)
    result_path = video_output_folder + video
    if not os.path.isfile(video):
        print("Video %s doesn't exist!")
    else:
        clip1 = VideoFileClip(video) #.subclip(10,20)
        white_clip = clip1.fl_image(process_image) #NOTE: this function expects color images!!
        %time white_clip.write_videofile(result_path, audio=False)

Saving videos as animated gifs

In [ ]:
videos = [
   "project_video.mp4",
   #"test_video.mp4"
]

video_fps = 10
clip_part = (30.0, 35.0)
resize_factor = 0.5

if not os.path.exists(video_output_folder):
    print("Output folder does not exist?!?")
for video in videos:
    result_path = video_output_folder + video
    if not os.path.isfile(result_path):
        print("Video %s doesn't exist!" % result_path)
        continue
    else:
        clip1 = VideoFileClip(result_path).subclip(*clip_part).resize(resize_factor)
        video_slowdown_factor = video_fps / clip1.fps
        clip1 = clip1.fx(vfx.speedx, video_slowdown_factor)
        %time clip1.write_gif(result_path+".gif", fps=video_fps)